(a) Simple linear regression for each variable
# run regression
nm <- names(dat)[!grepl("crim", names(dat))]
fit.simple <- lapply(nm, function(x) {
lm(as.formula(paste0("crim ~ ", x)), data = dat)
})
# print lm summary, graphs
invisible(lapply(fit.simple, mySummary))
Model: crim = 4.454 × (Intercept) + -0.074 × zn
| (Intercept) |
4.454 |
0.4172 |
10.67 |
4.038e-24 |
| zn |
-0.07393 |
0.01609 |
-4.594 |
5.506e-06 |
Fitting linear model: as.formula(paste0(“crim ~”, x))
| 506 |
8.435 |
0.04019 |
0.03828 |
Model: crim = -2.064 × (Intercept) + 0.51 × indus
| (Intercept) |
-2.064 |
0.6672 |
-3.093 |
0.002091 |
| indus |
0.5098 |
0.05102 |
9.991 |
1.45e-21 |
Fitting linear model: as.formula(paste0(“crim ~”, x))
| 506 |
7.866 |
0.1653 |
0.1637 |
Model: crim = 3.744 × (Intercept) + -1.893 × chas
| (Intercept) |
3.744 |
0.3961 |
9.453 |
1.24e-19 |
| chas |
-1.893 |
1.506 |
-1.257 |
0.2094 |
Fitting linear model: as.formula(paste0(“crim ~”, x))
| 506 |
8.597 |
0.003124 |
0.001146 |
Model: crim = -13.72 × (Intercept) + 31.249 × nox
| (Intercept) |
-13.72 |
1.699 |
-8.073 |
5.077e-15 |
| nox |
31.25 |
2.999 |
10.42 |
3.752e-23 |
Fitting linear model: as.formula(paste0(“crim ~”, x))
| 506 |
7.81 |
0.1772 |
0.1756 |
Model: crim = 20.482 × (Intercept) + -2.684 × rm
| (Intercept) |
20.48 |
3.364 |
6.088 |
2.272e-09 |
| rm |
-2.684 |
0.532 |
-5.045 |
6.347e-07 |
Fitting linear model: as.formula(paste0(“crim ~”, x))
| 506 |
8.401 |
0.04807 |
0.04618 |
Model: crim = -3.778 × (Intercept) + 0.108 × age
| (Intercept) |
-3.778 |
0.944 |
-4.002 |
7.222e-05 |
| age |
0.1078 |
0.01274 |
8.463 |
2.855e-16 |
Fitting linear model: as.formula(paste0(“crim ~”, x))
| 506 |
8.057 |
0.1244 |
0.1227 |
Model: crim = 9.499 × (Intercept) + -1.551 × dis
| (Intercept) |
9.499 |
0.7304 |
13.01 |
1.503e-33 |
| dis |
-1.551 |
0.1683 |
-9.213 |
8.52e-19 |
Fitting linear model: as.formula(paste0(“crim ~”, x))
| 506 |
7.965 |
0.1441 |
0.1425 |
Model: crim = -2.287 × (Intercept) + 0.618 × rad
| (Intercept) |
-2.287 |
0.4435 |
-5.157 |
3.606e-07 |
| rad |
0.6179 |
0.03433 |
18 |
2.694e-56 |
Fitting linear model: as.formula(paste0(“crim ~”, x))
| 506 |
6.718 |
0.3913 |
0.39 |
Model: crim = -8.528 × (Intercept) + 0.03 × tax
| (Intercept) |
-8.528 |
0.8158 |
-10.45 |
2.774e-23 |
| tax |
0.02974 |
0.001847 |
16.1 |
2.357e-47 |
Fitting linear model: as.formula(paste0(“crim ~”, x))
| 506 |
6.997 |
0.3396 |
0.3383 |
Model: crim = -17.647 × (Intercept) + 1.152 × ptratio
| (Intercept) |
-17.65 |
3.147 |
-5.607 |
3.395e-08 |
| ptratio |
1.152 |
0.1694 |
6.801 |
2.943e-11 |
Fitting linear model: as.formula(paste0(“crim ~”, x))
| 506 |
8.24 |
0.08407 |
0.08225 |
Model: crim = 16.554 × (Intercept) + -0.036 × black
| (Intercept) |
16.55 |
1.426 |
11.61 |
8.922e-28 |
| black |
-0.03628 |
0.003873 |
-9.367 |
2.487e-19 |
Fitting linear model: as.formula(paste0(“crim ~”, x))
| 506 |
7.946 |
0.1483 |
0.1466 |
Model: crim = -3.331 × (Intercept) + 0.549 × lstat
| (Intercept) |
-3.331 |
0.6938 |
-4.801 |
2.087e-06 |
| lstat |
0.5488 |
0.04776 |
11.49 |
2.654e-27 |
Fitting linear model: as.formula(paste0(“crim ~”, x))
| 506 |
7.664 |
0.2076 |
0.206 |
Model: crim = 11.797 × (Intercept) + -0.363 × medv
| (Intercept) |
11.8 |
0.9342 |
12.63 |
5.934e-32 |
| medv |
-0.3632 |
0.03839 |
-9.46 |
1.174e-19 |
Fitting linear model: as.formula(paste0(“crim ~”, x))
| 506 |
7.934 |
0.1508 |
0.1491 |

Statistically significant association between the predictor and the response variable has been observed for all the variables except chas.
(b) Multiple regression with all the variables
# run regression
fit.all <- lm(crim ~ ., data = dat)
mySummary(fit.all)
Model: crim = 17.033 × (Intercept) + 0.045 × zn + -0.064 × indus + -0.749 × chas + -10.314 × nox + 0.43 × rm + 0.001 × age + -0.987 × dis + 0.588 × rad + -0.004 × tax + -0.271 × ptratio + -0.008 × black + 0.126 × lstat + -0.199 × medv
| (Intercept) |
17.03 |
7.235 |
2.354 |
0.01895 |
| zn |
0.04486 |
0.01873 |
2.394 |
0.01702 |
| indus |
-0.06385 |
0.08341 |
-0.7656 |
0.4443 |
| chas |
-0.7491 |
1.18 |
-0.6348 |
0.5259 |
| nox |
-10.31 |
5.276 |
-1.955 |
0.05115 |
| rm |
0.4301 |
0.6128 |
0.7019 |
0.4831 |
| age |
0.001452 |
0.01793 |
0.08098 |
0.9355 |
| dis |
-0.9872 |
0.2818 |
-3.503 |
0.0005022 |
| rad |
0.5882 |
0.08805 |
6.68 |
6.46e-11 |
| tax |
-0.00378 |
0.005156 |
-0.7332 |
0.4638 |
| ptratio |
-0.2711 |
0.1865 |
-1.454 |
0.1466 |
| black |
-0.007538 |
0.003673 |
-2.052 |
0.0407 |
| lstat |
0.1262 |
0.07572 |
1.667 |
0.09621 |
| medv |
-0.1989 |
0.06052 |
-3.287 |
0.001087 |
Fitting linear model: crim ~ .
| 506 |
6.439 |
0.454 |
0.4396 |

Significant variables are: zn, dis, rad, black, medv.
(c) (a) and (b) results comparison plot
x <- sapply(fit.simple, function(x) coefficients(x)[2])
y <- coefficients(fit.all)[-1]
df <- data.table(Variable = names(x), Simple = x, Multiple = y, `Absolute difference` = abs(x -
y))
fig <- plot_ly(df, x = ~Simple, y = ~Multiple, type = "scatter", mode = "markers",
hoverinfo = "text", text = ~paste("</br> Variable: ", Variable, "</br> Simple regression: ",
round(Simple, 3), "</br> Multiple regression: ", round(Multiple, 3)))
fig <- fig %>% layout(title = "(a) and (b) results comparison plot", xaxis = list(title = "Simple regression for each variable"),
yaxis = list(title = "Multiple regression"))
fig
df[order(-`Absolute difference`)] %>% xtable() %>% kable() %>% kable_styling(position = "left",
bootstrap_options = "hover")
|
Variable
|
Simple
|
Multiple
|
Absolute difference
|
|
nox
|
31.2485312
|
-10.3135349
|
41.5620661
|
|
rm
|
-2.6840512
|
0.4301305
|
3.1141817
|
|
ptratio
|
1.1519828
|
-0.2710806
|
1.4230633
|
|
chas
|
-1.8927766
|
-0.7491336
|
1.1436429
|
|
indus
|
0.5097763
|
-0.0638548
|
0.5736312
|
|
dis
|
-1.5509017
|
-0.9871757
|
0.5637260
|
|
lstat
|
0.5488048
|
0.1262114
|
0.4225934
|
|
medv
|
-0.3631599
|
-0.1988868
|
0.1642731
|
|
zn
|
-0.0739350
|
0.0448552
|
0.1187902
|
|
age
|
0.1077862
|
0.0014516
|
0.1063346
|
|
tax
|
0.0297423
|
-0.0037800
|
0.0335223
|
|
rad
|
0.6179109
|
0.5882086
|
0.0297023
|
|
black
|
-0.0362796
|
-0.0075375
|
0.0287421
|
(d) Cubic model
# run regression
df.num <- Filter(function(x) {
class(x) == "numeric"
}, dat)
nm.num <- names(df.num)[!grepl("crim", names(df.num))]
fit.cubic <- lapply(nm.num, function(x) {
lm(as.formula(paste0("crim ~ ", paste0("poly(", x, ",3)"))), data = df.num)
})
# print lm summary, graphs
invisible(lapply(fit.cubic, mySummary))
Model: crim = 3.614 × (Intercept) + -38.75 × poly(zn, 3)1 + 23.94 × poly(zn, 3)2 + -10.072 × poly(zn, 3)3
| (Intercept) |
3.614 |
0.3722 |
9.709 |
1.547e-20 |
| poly(zn, 3)1 |
-38.75 |
8.372 |
-4.628 |
4.698e-06 |
| poly(zn, 3)2 |
23.94 |
8.372 |
2.859 |
0.004421 |
| poly(zn, 3)3 |
-10.07 |
8.372 |
-1.203 |
0.2295 |
Fitting linear model: as.formula(paste0(“crim ~”, paste0(“poly(”, x, “,3)”)))
| 506 |
8.372 |
0.05824 |
0.05261 |
Model: crim = 3.614 × (Intercept) + 78.591 × poly(indus, 3)1 + -24.395 × poly(indus, 3)2 + -54.13 × poly(indus, 3)3
| (Intercept) |
3.614 |
0.33 |
10.95 |
3.606e-25 |
| poly(indus, 3)1 |
78.59 |
7.423 |
10.59 |
8.854e-24 |
| poly(indus, 3)2 |
-24.39 |
7.423 |
-3.286 |
0.001086 |
| poly(indus, 3)3 |
-54.13 |
7.423 |
-7.292 |
1.196e-12 |
Fitting linear model: as.formula(paste0(“crim ~”, paste0(“poly(”, x, “,3)”)))
| 506 |
7.423 |
0.2597 |
0.2552 |
Model: crim = 3.614 × (Intercept) + 81.372 × poly(nox, 3)1 + -28.829 × poly(nox, 3)2 + -60.362 × poly(nox, 3)3
| (Intercept) |
3.614 |
0.3216 |
11.24 |
2.743e-26 |
| poly(nox, 3)1 |
81.37 |
7.234 |
11.25 |
2.457e-26 |
| poly(nox, 3)2 |
-28.83 |
7.234 |
-3.985 |
7.737e-05 |
| poly(nox, 3)3 |
-60.36 |
7.234 |
-8.345 |
6.961e-16 |
Fitting linear model: as.formula(paste0(“crim ~”, paste0(“poly(”, x, “,3)”)))
| 506 |
7.234 |
0.297 |
0.2928 |
Model: crim = 3.614 × (Intercept) + -42.379 × poly(rm, 3)1 + 26.577 × poly(rm, 3)2 + -5.51 × poly(rm, 3)3
| (Intercept) |
3.614 |
0.3703 |
9.758 |
1.027e-20 |
| poly(rm, 3)1 |
-42.38 |
8.33 |
-5.088 |
5.128e-07 |
| poly(rm, 3)2 |
26.58 |
8.33 |
3.191 |
0.001509 |
| poly(rm, 3)3 |
-5.51 |
8.33 |
-0.6615 |
0.5086 |
Fitting linear model: as.formula(paste0(“crim ~”, paste0(“poly(”, x, “,3)”)))
| 506 |
8.33 |
0.06779 |
0.06222 |
Model: crim = 3.614 × (Intercept) + 68.182 × poly(age, 3)1 + 37.484 × poly(age, 3)2 + 21.353 × poly(age, 3)3
| (Intercept) |
3.614 |
0.3485 |
10.37 |
5.919e-23 |
| poly(age, 3)1 |
68.18 |
7.84 |
8.697 |
4.879e-17 |
| poly(age, 3)2 |
37.48 |
7.84 |
4.781 |
2.291e-06 |
| poly(age, 3)3 |
21.35 |
7.84 |
2.724 |
0.00668 |
Fitting linear model: as.formula(paste0(“crim ~”, paste0(“poly(”, x, “,3)”)))
| 506 |
7.84 |
0.1742 |
0.1693 |
Model: crim = 3.614 × (Intercept) + -73.389 × poly(dis, 3)1 + 56.373 × poly(dis, 3)2 + -42.622 × poly(dis, 3)3
| (Intercept) |
3.614 |
0.3259 |
11.09 |
1.06e-25 |
| poly(dis, 3)1 |
-73.39 |
7.331 |
-10.01 |
1.253e-21 |
| poly(dis, 3)2 |
56.37 |
7.331 |
7.689 |
7.87e-14 |
| poly(dis, 3)3 |
-42.62 |
7.331 |
-5.814 |
1.089e-08 |
Fitting linear model: as.formula(paste0(“crim ~”, paste0(“poly(”, x, “,3)”)))
| 506 |
7.331 |
0.2778 |
0.2735 |
Model: crim = 3.614 × (Intercept) + 112.646 × poly(tax, 3)1 + 32.087 × poly(tax, 3)2 + -7.997 × poly(tax, 3)3
| (Intercept) |
3.614 |
0.3047 |
11.86 |
8.956e-29 |
| poly(tax, 3)1 |
112.6 |
6.854 |
16.44 |
6.976e-49 |
| poly(tax, 3)2 |
32.09 |
6.854 |
4.682 |
3.665e-06 |
| poly(tax, 3)3 |
-7.997 |
6.854 |
-1.167 |
0.2439 |
Fitting linear model: as.formula(paste0(“crim ~”, paste0(“poly(”, x, “,3)”)))
| 506 |
6.854 |
0.3689 |
0.3651 |
Model: crim = 3.614 × (Intercept) + 56.045 × poly(ptratio, 3)1 + 24.775 × poly(ptratio, 3)2 + -22.28 × poly(ptratio, 3)3
| (Intercept) |
3.614 |
0.361 |
10.01 |
1.271e-21 |
| poly(ptratio, 3)1 |
56.05 |
8.122 |
6.901 |
1.565e-11 |
| poly(ptratio, 3)2 |
24.77 |
8.122 |
3.05 |
0.002405 |
| poly(ptratio, 3)3 |
-22.28 |
8.122 |
-2.743 |
0.006301 |
Fitting linear model: as.formula(paste0(“crim ~”, paste0(“poly(”, x, “,3)”)))
| 506 |
8.122 |
0.1138 |
0.1085 |
Model: crim = 3.614 × (Intercept) + -74.431 × poly(black, 3)1 + 5.926 × poly(black, 3)2 + -4.835 × poly(black, 3)3
| (Intercept) |
3.614 |
0.3536 |
10.22 |
2.14e-22 |
| poly(black, 3)1 |
-74.43 |
7.955 |
-9.357 |
2.73e-19 |
| poly(black, 3)2 |
5.926 |
7.955 |
0.745 |
0.4566 |
| poly(black, 3)3 |
-4.835 |
7.955 |
-0.6078 |
0.5436 |
Fitting linear model: as.formula(paste0(“crim ~”, paste0(“poly(”, x, “,3)”)))
| 506 |
7.955 |
0.1498 |
0.1448 |
Model: crim = 3.614 × (Intercept) + 88.07 × poly(lstat, 3)1 + 15.888 × poly(lstat, 3)2 + -11.574 × poly(lstat, 3)3
| (Intercept) |
3.614 |
0.3392 |
10.65 |
4.939e-24 |
| poly(lstat, 3)1 |
88.07 |
7.629 |
11.54 |
1.678e-27 |
| poly(lstat, 3)2 |
15.89 |
7.629 |
2.082 |
0.0378 |
| poly(lstat, 3)3 |
-11.57 |
7.629 |
-1.517 |
0.1299 |
Fitting linear model: as.formula(paste0(“crim ~”, paste0(“poly(”, x, “,3)”)))
| 506 |
7.629 |
0.2179 |
0.2133 |
Model: crim = 3.614 × (Intercept) + -75.058 × poly(medv, 3)1 + 88.086 × poly(medv, 3)2 + -48.033 × poly(medv, 3)3
| (Intercept) |
3.614 |
0.292 |
12.37 |
7.024e-31 |
| poly(medv, 3)1 |
-75.06 |
6.569 |
-11.43 |
4.931e-27 |
| poly(medv, 3)2 |
88.09 |
6.569 |
13.41 |
2.929e-35 |
| poly(medv, 3)3 |
-48.03 |
6.569 |
-7.312 |
1.047e-12 |
Fitting linear model: as.formula(paste0(“crim ~”, paste0(“poly(”, x, “,3)”)))
| 506 |
6.569 |
0.4202 |
0.4167 |
